clusterProfilerLink vignette: https://yulab-smu.github.io/clusterProfiler-book/index.html
ChIPseeker, puede ser interesante.dataAnnotatr <- read.csv(file.path(dataPath, dirO.7, "annotatr_results_E1_0.7.csv"))
allGenesDF <- data.frame(ENTREZID = dataAnnotatr$annot.gene_id,
SYMBOL = dataAnnotatr$annot.symbol)
# allGenesDF <- bitr(allGenes, fromType = "SYMBOL",
# toType = c("ENSEMBL", "ENTREZID"),
# OrgDb = org.Hs.eg.db)
clusterProfilerggo1 <- sapply(c("MF", "CC", "BP"), function(term) {
enrichGO(unique(allGenesDF$ENTREZID), org.Hs.eg.db,
ont = term, qvalueCutoff = 0.05,
readable = TRUE)})
dotplot(ggo1$MF, showCategory = 20, font.size = 8)
barplot(ggo1$MF, showCategory = 20, font.size = 8)
upsetplot(ggo1$MF)
# goplot(ggo1)
Tabla resultados
DT::datatable(ggo1$MF@result)
ggo1MFsimplified <- simplify(ggo1$MF)
dotplot(ggo1MFsimplified, showCategory = 20, font.size = 8)
barplot(ggo1MFsimplified, showCategory = 20, font.size = 8)
Link datos: http://bio-bigdata.hrbmu.edu.cn/CellMarker
dataHumanCells <- read.delim(file.path(analysisPath, "/data/Human_cell_markers.txt"))
dataHumanCells <- dataHumanCells %>% tidyr::unite("cellMarker", tissueType,
cancerType, cellName, sep=", ") %>%
dplyr::select(cellMarker, geneID) %>%
dplyr::mutate(geneID = strsplit(geneID, ', '))
Mediante la función enricher podemos ver el enriquecimiento de genesets
propios en nuestro conjunto de genes. Es la msima base que con las anotaciones
GO, pero aquí estamos construyendo nosotros la base de datos.
typeCells <- enricher(unique(allGenesDF$ENTREZID), TERM2GENE = dataHumanCells,
pAdjustMethod = "fdr", minGSSize = 5, qvalueCutoff = 0.2)
dotplot(typeCells, showCategory = 10, font.size = 8)
barplot(typeCells, "GeneRatio", showCategory = 10, font.size = 8)
upsetplot(typeCells)
DT::datatable(typeCells@result)
Subset con células sangre
dataBloodCells <- read.delim(file.path(analysisPath, "/data/Human_cell_markers.txt"))
monocytesInfo <- unique(dataBloodCells[grepl(".*([mM]onocyte)", dataBloodCells$cellName),]$tissueType)
dataBloodCells <- dataBloodCells %>% filter(tissueType %in% monocytesInfo) %>%
tidyr::unite("cellMarker", tissueType, cancerType, cellName, sep=", ") %>%
dplyr::select(cellMarker, geneID) %>%
dplyr::mutate(geneID = strsplit(geneID, ', '))
Con el subset no es capaz de sacar genesets significativos, ya que probablemente los genes que comparten sean muy parecidos entre todos los grupos.
typeCellsBlood <- enricher(unique(allGenesDF$ENTREZID), TERM2GENE = dataBloodCells,
pAdjustMethod = "fdr", minGSSize = 5, qvalueCutoff = 0.2)
dotplot(typeCellsBlood, showCategory = 10, font.size = 8)
barplot(typeCellsBlood, "GeneRatio", showCategory = 10, font.size = 8)
DT::datatable(typeCellsBlood@result)
Es la base de datos de GSEA, tienen un apartado específico para células inmunes. Está bien que las entradas salgan relacionadas con monocitos, ya que estamos sesgando mucho el análisis por hacer enriquecimiento en una base de datos donde solo hay células inmunes.
Link paper: https://www.cell.com/immunity/fulltext/S1074-7613(15)00532-4
geneImmSginature <- msigdbr(species = "Homo sapiens", category = "C7") %>%
dplyr::select(gs_name, entrez_gene)
immCells <- enricher(unique(allGenesDF$ENTREZID), TERM2GENE = geneImmSginature)
dotplot(immCells, showCategory = 10, font.size = 6)
barplot(immCells, "GeneRatio", showCategory = 10, font.size = 6)
upsetplot(immCells)
dso1 <- enrichDGN(allGenesDF$ENTREZID, readable = TRUE)
dotplot(dso1, showCategory = 20, font.size = 8)
barplot(dso1, showCategory = 20, font.size = 8)
upsetplot(dso1)
do1 <- enrichDO(allGenesDF$ENTREZID,
readable = TRUE)
dotplot(do1, showCategory = 20, font.size = 8)
barplot(do1, showCategory = 20, font.size = 8)
upsetplot(do1)
kegg1 <- enrichKEGG(gene = allGenesDF$ENTREZID,
organism = "hsa")
dotplot(kegg1, showCategory = 20, font.size = 8)
barplot(kegg1, showCategory = 20, font.size = 8)
upsetplot(kegg1)
pho_KEGGresult <- find_enriched_pathway(unique(allGenesDF$ENTREZID),
species = 'hsa')
DT::datatable(pho_KEGGresult[[1]])
genesPromoter <- dataAnnotatr %>% filter(grepl("^(promoter).*", annot.id),
!is.na(annot.gene_id))
genesPromoterDF <- data.frame(ENTREZID = genesPromoter$annot.gene_id,
SYMBOL = genesPromoter$annot.symbol)
# genesPromoterDF <- bitr(genesPromoter, fromType = "SYMBOL",
# toType = c("ENSEMBL", "ENTREZID"),
# OrgDb = org.Hs.eg.db)
clusterProfilerggo2 <- sapply(c(c("MF", "CC", "BP")), function(x) {
enrichGO(unique(genesPromoterDF$ENTREZID), org.Hs.eg.db,
ont = "MF", qvalueCutoff = 0.05,
readable = TRUE)}
)
dotplot(ggo2$MF, showCategory = 20, font.size = 8)
barplot(ggo2$MF, showCategory = 20, font.size = 8)
upsetplot(ggo2$MF)
goplot(ggo2$MF)
DT::datatable(ggo2$MF@result)
Para evitar la redundancia de los términos, el paquete te permite limpiar entradas redundantes. Pasa de 1014 a 91 términos enriquecidos.
ggo2MFsimplified <- simplify(ggo2$MF)
dotplot(ggo2MFsimplified, showCategory = 20, font.size = 8)
barplot(ggo2MFsimplified, showCategory = 20, font.size = 8)
Ejemplo
hsGO <- godata('org.Hs.eg.db', ont="MF")
distan <- mgeneSim(head(genesPromoterDF$ENTREZID, n = 20), semData = hsGO,
measure = "Wang", combine = "BMA")
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
pheatmap::pheatmap(distan)
https://rdrr.io/bioc/enrichplot/src/R/dotplot.R
typeCellsPromoters <- enricher(unique(genesPromoterDF$ENTREZID), TERM2GENE = dataHumanCells,
pAdjustMethod = "fdr", minGSSize = 5, qvalueCutoff = 0.2)
dotplot(typeCellsPromoters, showCategory = 50, font.size = 8)
barplot(typeCellsPromoters, showCategory = 50, font.size = 8)
DT::datatable(typeCellsPromoters@result)
dso2 <- enrichDGN(genesPromoterDF$ENTREZID,
readable = TRUE)
dotplot(dso2, showCategory = 20, font.size = 8)
barplot(dso2, showCategory = 20, font.size = 8)
upsetplot(dso2)
enrichplot::pmcplot(head(dso2$Description), 2012:2019)